
# https://movie.douban.com/
# https://movie.douban.com/typerank?type_name=%E5%96%9C%E5%89%A7&type=24&interval_id=100:90&action=
# https://movie.douban.com/j/chart/top_list?type=24&interval_id=100%3A90&action=&start=0&limit=20

import urllib.request
import urllib.parse
from wsgiref import headers


def douban(page):
    url = 'https://movie.douban.com/j/chart/top_list?type=24&interval_id=100%3A90&action=&'
    headers = {
        'cookie':'ll="118123"; bid=Fd_4LIIejz0; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1737533747%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DY5UfsvfsQHRdndGjd-Zg37hR-OCLlpUaUN1J-kde7rM-3gtRx0aPM-KXV2emtqZC%26wd%3D%26eqid%3D8ebf1ced0009426c000000026790a92c%22%5D; _pk_id.100001.4cf6=4220a061c440ae24.1737533747.; _pk_ses.100001.4cf6=1; ap_v=0,6.0; __utma=30149280.1136672349.1737533748.1737533748.1737533748.1; __utmb=30149280.0.10.1737533748; __utmc=30149280; __utmz=30149280.1737533748.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __utma=223695111.917131669.1737533748.1737533748.1737533748.1; __utmb=223695111.0.10.1737533748; __utmc=223695111; __utmz=223695111.1737533748.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __yadk_uid=BM5RwClKvpaAdB2MTmPZlGznBX8mkjGF; _vwo_uuid_v2=DE6F3D864B4F28348AA9C8EBFE20D6B97|010518078ab09737a448765e97b29f8d',
        'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
    }
    data = {
        'start': (page - 1) * 20,
        'limit': (page) * 20,
    }
    data = urllib.parse.urlencode(data)
    url = url + data
    req =  urllib.request.Request(url=url,headers=headers)
    return req

def getcontent(req):
    response = urllib.request.urlopen(req)
    content = response.read().decode('utf-8')
    return content

def download(content,fname):
    # 下载 文件写入
    f = open('./download/%s.json' % fname,'w',encoding='utf-8')
    f.write(content)
    f.close()


# https://movie.douban.com/j/chart/top_list?type=24&interval_id=100%3A90&action=&
# start=0&limit=20

# 程序入口
if __name__ == '__main__':
    start_page = 1
    end_page = 10
    for page in range(start_page,end_page+1):
        req = douban(page)
        print(req,page)
        content = getcontent(req)
        download(content,'douban2')

# ctrl + alt + l   格式化json

